Seurat pipeline

getwd()
## [1] "/Users/matianyu/R_project/Seurat_V5"
setwd('/Users/matianyu/R_project/Seurat_V5/')
# prepare workspace
library(Seurat)
library(SeuratWrappers)
library(ggplot2)
library(patchwork)
library(magrittr)
library(dplyr)
# load data
pbmc.data <- Read10X(data.dir = "./filtered_gene_bc_matrices/hg19/")
pbmc <- CreateSeuratObject(counts = pbmc.data, project = "pbmc3k", min.cells = 3, min.features = 200)
## Warning: Feature names cannot have underscores ('_'), replacing with dashes ('-')
pbmc
## An object of class Seurat 
## 13714 features across 2700 samples within 1 assay 
## Active assay: RNA (13714 features, 0 variable features)
##  1 layer present: counts
# count matrix
# pbmc[["RNA"]]$counts
# Lets examine a few genes in the first thirty cells
pbmc.data[c("CD3D", "TCL1A", "MS4A1"), 1:30]
## 3 x 30 sparse Matrix of class "dgCMatrix"
##   [[ suppressing 30 column names 'AAACATACAACCAC-1', 'AAACATTGAGCTAC-1', 'AAACATTGATCAGC-1' ... ]]
##                                                                    
## CD3D  4 . 10 . . 1 2 3 1 . . 2 7 1 . . 1 3 . 2  3 . . . . . 3 4 1 5
## TCL1A . .  . . . . . . 1 . . . . . . . . . . .  . 1 . . . . . . . .
## MS4A1 . 6  . . . . . . 1 1 1 . . . . . . . . . 36 1 2 . . 2 . . . .
dense.size <- object.size(as.matrix(pbmc.data))
dense.size
## 709591472 bytes
sparse.size <- object.size(pbmc.data)
sparse.size
## 29905192 bytes
dense.size/sparse.size
## 23.7 bytes
# The [[ operator can add columns to object metadata. This is a great place to stash QC stats
pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
# Show QC metrics for the first 5 cells
head(pbmc@meta.data, 5)
##                  orig.ident nCount_RNA nFeature_RNA percent.mt
## AAACATACAACCAC-1     pbmc3k       2419          779  3.0177759
## AAACATTGAGCTAC-1     pbmc3k       4903         1352  3.7935958
## AAACATTGATCAGC-1     pbmc3k       3147         1129  0.8897363
## AAACCGTGCTTCCG-1     pbmc3k       2639          960  1.7430845
## AAACCGTGTATGCG-1     pbmc3k        980          521  1.2244898
# Visualize QC metrics as a violin plot
VlnPlot(pbmc, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results; utilizing "counts" layer instead.

# FeatureScatter is typically used to visualize feature-feature relationships, but can be used
# for anything calculated by the object, i.e. columns in object metadata, PC scores etc.

plot1 <- FeatureScatter(pbmc, feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(pbmc, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plot1 + plot2

pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
pbmc
## An object of class Seurat 
## 13714 features across 2638 samples within 1 assay 
## Active assay: RNA (13714 features, 0 variable features)
##  1 layer present: counts
pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
## Normalizing layer: counts
#pbmc[["RNA"]]$counts
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
## Finding variable features for layer counts
# Identify the 10 most highly variable genes
top10 <- head(VariableFeatures(pbmc), 10)

# plot variable features with and without labels
plot1 <- VariableFeaturePlot(pbmc)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
## When using repel, set xnudge and ynudge to 0 for optimal results
plot1 + plot2
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.
## log-10 transformation introduced infinite values.

all.genes <- rownames(pbmc)
pbmc <- ScaleData(pbmc, features = all.genes)
## Centering and scaling data matrix
# pbmc[["RNA"]]$scale.data
# suggest alter by SCTransform method

pbmc <- ScaleData(pbmc, vars.to.regress = "percent.mt")
## Regressing out percent.mt
## Centering and scaling data matrix
## Warning: Different features in new layer data than already exists for scale.data
pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
## PC_ 1 
## Positive:  CST3, TYROBP, LST1, AIF1, FTL, FTH1, LYZ, FCN1, S100A9, TYMP 
##     FCER1G, CFD, LGALS1, LGALS2, SERPINA1, S100A8, CTSS, IFITM3, SPI1, CFP 
##     PSAP, IFI30, COTL1, SAT1, S100A11, NPC2, GRN, LGALS3, GSTP1, PYCARD 
## Negative:  MALAT1, LTB, IL32, IL7R, CD2, B2M, ACAP1, CTSW, STK17A, CD27 
##     CD247, CCL5, GIMAP5, GZMA, AQP3, CST7, TRAF3IP3, SELL, GZMK, HOPX 
##     MAL, MYC, ITM2A, ETS1, LYAR, GIMAP7, KLRG1, NKG7, ZAP70, BEX2 
## PC_ 2 
## Positive:  CD79A, MS4A1, TCL1A, HLA-DQA1, HLA-DQB1, HLA-DRA, LINC00926, CD79B, HLA-DRB1, CD74 
##     HLA-DMA, HLA-DPB1, HLA-DQA2, CD37, HLA-DRB5, HLA-DMB, HLA-DPA1, FCRLA, HVCN1, LTB 
##     BLNK, P2RX5, IGLL5, IRF8, SWAP70, ARHGAP24, FCGR2B, SMIM14, PPP1R14A, C16orf74 
## Negative:  NKG7, PRF1, CST7, GZMA, GZMB, FGFBP2, CTSW, GNLY, B2M, SPON2 
##     CCL4, GZMH, FCGR3A, CCL5, CD247, XCL2, CLIC3, AKR1C3, SRGN, HOPX 
##     TTC38, CTSC, APMAP, S100A4, IGFBP7, ANXA1, ID2, IL32, XCL1, RHOC 
## PC_ 3 
## Positive:  HLA-DQA1, CD79A, CD79B, HLA-DQB1, HLA-DPA1, HLA-DPB1, CD74, MS4A1, HLA-DRB1, HLA-DRA 
##     HLA-DRB5, HLA-DQA2, TCL1A, LINC00926, HLA-DMB, HLA-DMA, CD37, HVCN1, FCRLA, IRF8 
##     PLAC8, BLNK, MALAT1, SMIM14, PLD4, IGLL5, SWAP70, P2RX5, LAT2, FCGR3A 
## Negative:  PPBP, PF4, SDPR, SPARC, GNG11, NRGN, GP9, RGS18, TUBB1, CLU 
##     HIST1H2AC, AP001189.4, ITGA2B, CD9, TMEM40, PTCRA, CA2, ACRBP, MMD, TREML1 
##     NGFRAP1, F13A1, SEPT5, RUFY1, TSC22D1, CMTM5, MPP1, MYL9, RP11-367G6.3, GP1BA 
## PC_ 4 
## Positive:  HLA-DQA1, CD79B, CD79A, MS4A1, HLA-DQB1, CD74, HLA-DPB1, HIST1H2AC, PF4, HLA-DRB1 
##     HLA-DPA1, SDPR, TCL1A, HLA-DQA2, HLA-DRA, PPBP, LINC00926, GNG11, HLA-DRB5, SPARC 
##     GP9, PTCRA, AP001189.4, CA2, CD9, NRGN, RGS18, CLU, TUBB1, GZMB 
## Negative:  VIM, IL7R, S100A6, S100A8, IL32, S100A4, GIMAP7, S100A10, S100A9, MAL 
##     AQP3, CD14, CD2, LGALS2, FYB, GIMAP4, ANXA1, RBP7, CD27, FCN1 
##     LYZ, S100A12, MS4A6A, GIMAP5, S100A11, FOLR3, TRABD2A, AIF1, IL8, IFI6 
## PC_ 5 
## Positive:  GZMB, FGFBP2, S100A8, NKG7, GNLY, CCL4, PRF1, CST7, SPON2, GZMA 
##     GZMH, LGALS2, S100A9, CCL3, XCL2, CD14, CLIC3, CTSW, MS4A6A, S100A12 
##     GSTP1, RBP7, IGFBP7, FOLR3, AKR1C3, TYROBP, CCL5, TTC38, XCL1, APMAP 
## Negative:  LTB, IL7R, CKB, MS4A7, RP11-290F20.3, AQP3, SIGLEC10, VIM, CYTIP, HMOX1 
##     LILRB2, PTGES3, HN1, CD2, CD27, FAM110A, ANXA5, CTD-2006K23.1, MAL, VMO1 
##     CORO1B, TUBA1B, LILRA3, GDI2, TRADD, IL32, ATP1A1, ABRACL, CCDC109B, PPA1
# Examine and visualize PCA results a few different ways
print(pbmc[["pca"]], dims = 1:5, nfeatures = 5)
## PC_ 1 
## Positive:  CST3, TYROBP, LST1, AIF1, FTL 
## Negative:  MALAT1, LTB, IL32, IL7R, CD2 
## PC_ 2 
## Positive:  CD79A, MS4A1, TCL1A, HLA-DQA1, HLA-DQB1 
## Negative:  NKG7, PRF1, CST7, GZMA, GZMB 
## PC_ 3 
## Positive:  HLA-DQA1, CD79A, CD79B, HLA-DQB1, HLA-DPA1 
## Negative:  PPBP, PF4, SDPR, SPARC, GNG11 
## PC_ 4 
## Positive:  HLA-DQA1, CD79B, CD79A, MS4A1, HLA-DQB1 
## Negative:  VIM, IL7R, S100A6, S100A8, IL32 
## PC_ 5 
## Positive:  GZMB, FGFBP2, S100A8, NKG7, GNLY 
## Negative:  LTB, IL7R, CKB, MS4A7, RP11-290F20.3
VizDimLoadings(pbmc, dims = 1:2, reduction = "pca")

DimPlot(pbmc, reduction = "pca") + NoLegend()

# DimPlot(pbmc, reduction = "pca",group.by = "orig.ident")
DimHeatmap(pbmc, dims = 1, cells = 500, balanced = TRUE)

DimHeatmap(pbmc, dims = 1:15, cells = 500, balanced = TRUE)

ElbowPlot(pbmc)

# Method 2 for determin the dimensionality of the dataset
# NOTE: This process can take a long time for big datasets, comment out for expediency. 
pbmc <- JackStraw(pbmc, dims = 25,num.replicate = 100)
pbmc <- ScoreJackStraw(pbmc, dims = 1:25)
JackStrawPlot(pbmc, dims = 1:25)
## Warning: Removed 41439 rows containing missing values or values outside the scale range (`geom_point()`).

pbmc <- FindNeighbors(pbmc, dims = 1:10)
## Computing nearest neighbor graph
## Computing SNN
pbmc <- FindClusters(pbmc, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2638
## Number of edges: 95905
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8735
## Number of communities: 9
## Elapsed time: 0 seconds
# Look at cluster IDs of the first 5 cells
head(Idents(pbmc), 5)
## AAACATACAACCAC-1 AAACATTGAGCTAC-1 AAACATTGATCAGC-1 AAACCGTGCTTCCG-1 AAACCGTGTATGCG-1 
##                0                3                2                1                6 
## Levels: 0 1 2 3 4 5 6 7 8
pbmc <- RunUMAP(pbmc, dims = 1:10)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 04:55:01 UMAP embedding parameters a = 0.9922 b = 1.112
## 04:55:01 Read 2638 rows and found 10 numeric columns
## 04:55:01 Using Annoy for neighbor search, n_neighbors = 30
## 04:55:01 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 04:55:01 Writing NN index file to temp file /var/folders/zk/qm3tj_dj2xs0jr03sj14gb300000gn/T//Rtmpz4Wiya/file17d4256b95815
## 04:55:01 Searching Annoy index using 1 thread, search_k = 3000
## 04:55:02 Annoy recall = 100%
## 04:55:02 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 04:55:03 Initializing from normalized Laplacian + noise (using RSpectra)
## 04:55:03 Commencing optimization for 500 epochs, with 106310 positive edges
## 04:55:06 Optimization finished
# note that you can set `label = TRUE` or use the LabelClusters function to help label
# individual clusters
DimPlot(pbmc, reduction = "umap")

DimPlot(pbmc, reduction = "pca")

# pbmc <- RunTSNE(pbmc, dims = 1:10)
# reduction = "tsne"
saveRDS(pbmc, file = "./pbmc_tutorial.rds")
# for single cluster
# find all markers of cluster 2
cluster2.markers <- FindMarkers(pbmc, ident.1 = 2)
head(cluster2.markers, n = 5)
##             p_val avg_log2FC pct.1 pct.2    p_val_adj
## LTB  1.709675e-83   1.330256 0.982 0.647 2.344648e-79
## IL32 5.076510e-83   1.242930 0.947 0.471 6.961926e-79
## LDHB 2.467055e-68   1.044820 0.967 0.615 3.383320e-64
## CD3D 1.817480e-66   1.058609 0.920 0.438 2.492492e-62
## IL7R 8.698894e-61   1.389909 0.744 0.333 1.192966e-56
# design DE group
# find all markers distinguishing cluster 5 from clusters 0 and 3
cluster5.markers <- FindMarkers(pbmc, ident.1 = 5, ident.2 = c(0, 3))
head(cluster5.markers, n = 5)
##                       p_val avg_log2FC pct.1 pct.2     p_val_adj
## FCGR3A        5.972471e-204   6.795991 0.975 0.041 8.190647e-200
## IFITM3        5.671364e-195   6.201036 0.975 0.048 7.777708e-191
## CFD           2.389538e-193   6.081028 0.937 0.038 3.277012e-189
## CD68          1.800066e-189   5.472200 0.925 0.036 2.468611e-185
## RP11-290F20.3 6.852416e-189   6.390800 0.843 0.015 9.397404e-185
# find markers for every cluster compared to all remaining cells, report only the positive
# ones
pbmc.markers <- FindAllMarkers(pbmc, only.pos = TRUE)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 7
## Calculating cluster 8
pbmc.markers
##                       p_val avg_log2FC pct.1 pct.2     p_val_adj cluster          gene
## RPS6          2.937899e-145  0.6898595 1.000 0.995 4.029035e-141       0          RPS6
## RPS12         4.537276e-144  0.7396535 1.000 0.991 6.222420e-140       0         RPS12
## RPL32         1.899782e-140  0.6330085 0.999 0.995 2.605361e-136       0         RPL32
## RPS27         2.256206e-137  0.7261693 0.999 0.992 3.094161e-133       0         RPS27
## RPS14         1.833572e-133  0.6367519 1.000 0.994 2.514561e-129       0         RPS14
## RPS25         2.202430e-129  0.7911714 0.997 0.975 3.020412e-125       0         RPS25
## RPL31         9.519837e-121  0.7770798 0.999 0.963 1.305550e-116       0         RPL31
## RPL9          1.093296e-119  0.7735097 0.999 0.970 1.499346e-115       0          RPL9
## RPS3A         1.360323e-116  0.8204878 0.999 0.974 1.865546e-112       0         RPS3A
## RPS3          4.735309e-115  0.6026055 1.000 0.994 6.494002e-111       0          RPS3
## LDHB          5.315619e-114  1.1863137 0.912 0.591 7.289840e-110       0          LDHB
## RPL13         8.785264e-114  0.5558595 1.000 0.996 1.204811e-109       0         RPL13
## RPL3          1.259214e-110  0.6016573 0.997 0.995 1.726886e-106       0          RPL3
## RPL30         1.616151e-110  0.6806377 1.000 0.979 2.216390e-106       0         RPL30
## RPL21         3.071789e-110  0.6770451 0.997 0.991 4.212651e-106       0         RPL21
## RPS15A        1.166374e-108  0.6527358 0.997 0.983 1.599565e-104       0        RPS15A
## RPS27A        3.958327e-106  0.7768403 0.997 0.966 5.428449e-102       0        RPS27A
## RPLP2         3.902958e-103  0.6034904 1.000 0.990  5.352517e-99       0         RPLP2
## EEF1A1         1.093339e-98  0.5369938 0.994 0.991  1.499405e-94       0        EEF1A1
## RPS13          1.074943e-97  0.7129838 0.987 0.961  1.474177e-93       0         RPS13
## RPS18          5.180602e-95  0.5265658 1.000 0.996  7.104678e-91       0         RPS18
## RPL11          8.215087e-93  0.4714148 1.000 0.995  1.126617e-88       0         RPL11
## RPL13A         6.472793e-92  0.4780475 1.000 0.999  8.876788e-88       0        RPL13A
## MALAT1         1.084174e-89  0.6441156 1.000 0.999  1.486836e-85       0        MALAT1
## RPL27A         1.612236e-88  0.5373650 0.999 0.988  2.211020e-84       0        RPL27A
## RPL35A         2.095224e-88  0.5867145 1.000 0.980  2.873390e-84       0        RPL35A
## RPS28          1.289928e-87  0.6408666 0.991 0.969  1.769007e-83       0         RPS28
## RPS23          8.996629e-87  0.5938163 1.000 0.976  1.233798e-82       0         RPS23
## RPS29          2.279008e-86  0.7683730 0.993 0.903  3.125432e-82       0         RPS29
## TPT1           7.840418e-86  0.6067410 0.997 0.982  1.075235e-81       0          TPT1
## RPS20          2.359727e-85  0.7100360 0.991 0.941  3.236130e-81       0         RPS20
## RPL23A         7.505228e-84  0.5568751 1.000 0.988  1.029267e-79       0        RPL23A
## CCR7           1.305705e-83  2.3501488 0.439 0.110  1.790644e-79       0          CCR7
## RPS4X          5.341427e-82  0.5230436 0.997 0.992  7.325233e-78       0         RPS4X
## CD3D           2.613397e-78  1.0628875 0.850 0.403  3.584012e-74       0          CD3D
## RPL5           1.082456e-76  0.6075214 0.988 0.969  1.484480e-72       0          RPL5
## RPL10          2.064950e-75  0.3709084 1.000 0.997  2.831872e-71       0         RPL10
## RPS16          1.223484e-73  0.5411983 0.996 0.982  1.677886e-69       0         RPS16
## RPL19          9.843176e-73  0.4022453 0.999 0.993  1.349893e-68       0         RPL19
## RPL10A         7.091479e-71  0.5470559 0.991 0.985  9.725254e-67       0        RPL10A
## RPSA           2.882226e-68  0.5854662 0.988 0.948  3.952685e-64       0          RPSA
## RPL14          6.585636e-64  0.4447083 0.996 0.985  9.031541e-60       0         RPL14
## RPS10          1.332199e-61  0.5371322 0.991 0.968  1.826978e-57       0         RPS10
## RPS5           2.436058e-61  0.5184481 0.990 0.975  3.340810e-57       0          RPS5
## NPM1           9.738318e-61  0.7971905 0.922 0.797  1.335513e-56       0          NPM1
## RPS15          7.595881e-60  0.4177772 1.000 0.989  1.041699e-55       0         RPS15
## RPL18          1.237903e-57  0.5038895 0.999 0.978  1.697661e-53       0         RPL18
## RPLP0          1.978925e-57  0.5476535 0.988 0.938  2.713897e-53       0         RPLP0
## RPL36          9.837428e-57  0.5491005 0.990 0.948  1.349105e-52       0         RPL36
## CD3E           5.885316e-55  1.0274342 0.731 0.398  8.071122e-51       0          CD3E
## RPL7           2.243682e-53  0.5914250 0.981 0.966  3.076986e-49       0          RPL7
## RPL36A         1.706982e-51  0.6163363 0.965 0.887  2.340956e-47       0        RPL36A
## LEF1           3.911145e-50  2.1066714 0.338 0.104  5.363744e-46       0          LEF1
## RPL4           9.797668e-50  0.5170381 0.994 0.956  1.343652e-45       0          RPL4
## RPS19          4.943950e-49  0.3765976 0.999 0.992  6.780134e-45       0         RPS19
## RPS2           8.998587e-49  0.3366035 1.000 0.995  1.234066e-44       0          RPS2
## RPL17          7.052454e-48  0.4772102 0.977 0.957  9.671735e-44       0         RPL17
## EEF1B2         1.972361e-47  0.5724925 0.945 0.867  2.704896e-43       0        EEF1B2
## NOSIP          2.532206e-47  1.2285845 0.624 0.360  3.472667e-43       0         NOSIP
## RPL22          8.502524e-47  0.7086351 0.913 0.776  1.166036e-42       0         RPL22
## RPS8           8.809197e-47  0.4319487 0.997 0.984  1.208093e-42       0          RPS8
## RPL18A         1.000407e-46  0.3206133 0.999 0.992  1.371958e-42       0        RPL18A
## PRKCQ-AS1      5.109668e-46  2.0421152 0.335 0.109  7.007398e-42       0     PRKCQ-AS1
## PIK3IP1        5.487008e-43  1.5149033 0.438 0.186  7.524882e-39       0       PIK3IP1
## TMEM66         5.573516e-43  0.8245007 0.850 0.698  7.643520e-39       0        TMEM66
## RPS26          5.330005e-42  0.5299071 0.965 0.897  7.309569e-38       0         RPS26
## BTG1           9.098088e-42  0.5963114 0.947 0.858  1.247712e-37       0          BTG1
## JUNB           3.637979e-41  0.7038651 0.913 0.904  4.989125e-37       0          JUNB
## GLTSCR2        4.347251e-41  0.6219340 0.916 0.804  5.961820e-37       0       GLTSCR2
## FHIT           9.171236e-41  2.7283106 0.199 0.040  1.257743e-36       0          FHIT
## IL7R           3.785547e-40  0.9511342 0.613 0.328  5.191499e-36       0          IL7R
## RPL38          9.709762e-40  0.5534875 0.952 0.872  1.331597e-35       0         RPL38
## RPS21          3.778434e-38  0.5389210 0.942 0.838  5.181744e-34       0         RPS21
## RPLP1          1.229862e-37  0.3157365 0.999 0.996  1.686633e-33       0         RPLP1
## CD7            4.521902e-37  0.8346521 0.571 0.295  6.201337e-33       0           CD7
## RPL35          2.614687e-36  0.4065346 0.993 0.964  3.585781e-32       0         RPL35
## RPL12          3.650663e-35  0.2977780 1.000 0.989  5.006519e-31       0         RPL12
## RPL24          8.842800e-35  0.4278003 0.984 0.942  1.212702e-30       0         RPL24
## RPL37          8.721650e-34  0.4374005 0.987 0.942  1.196087e-29       0         RPL37
## TCF7           1.257656e-33  1.3168493 0.390 0.177  1.724750e-29       0          TCF7
## MAL            7.989692e-33  1.8871824 0.263 0.088  1.095706e-28       0           MAL
## C6orf48        2.257274e-32  0.9450448 0.699 0.491  3.095626e-28       0       C6orf48
## RPL34          1.934954e-31  0.6268241 0.931 0.902  2.653596e-27       0         RPL34
## RPSAP58        1.586125e-30  0.5962843 0.848 0.696  2.175212e-26       0       RPSAP58
## RPL27          2.169278e-30  0.3826747 0.977 0.951  2.974948e-26       0         RPL27
## NELL2          6.989473e-30  2.1982153 0.156 0.033  9.585363e-26       0         NELL2
## RPL37A         8.086232e-30  0.3776142 0.986 0.945  1.108946e-25       0        RPL37A
## CD27           1.582455e-29  0.9705380 0.399 0.183  2.170179e-25       0          CD27
## RPS4Y1         3.345104e-29  0.4904763 0.939 0.869  4.587476e-25       0        RPS4Y1
## LTB            4.574471e-29  0.4311664 0.903 0.633  6.273430e-25       0           LTB
## RPL6           1.421552e-28  0.3022301 0.999 0.992  1.949516e-24       0          RPL6
## LINC00176      8.346010e-28  2.5390247 0.146 0.031  1.144572e-23       0     LINC00176
## RPL7A          1.703449e-27  0.3293995 0.987 0.975  2.336110e-23       0         RPL7A
## LDLRAP1        2.865661e-27  2.3083384 0.240 0.087  3.929967e-23       0       LDLRAP1
## LEPROTL1       3.988222e-27  1.1531127 0.405 0.208  5.469447e-23       0      LEPROTL1
## TRABD2A        6.169276e-27  2.2714828 0.197 0.060  8.460545e-23       0       TRABD2A
## RPL15          2.248364e-25  0.2783623 0.996 0.996  3.083407e-21       0         RPL15
## SH3YL1         1.113838e-23  1.7373622 0.197 0.065  1.527517e-19       0        SH3YL1
## ADTRP          1.463116e-23  3.0056072 0.101 0.016  2.006517e-19       0         ADTRP
## AES            2.672638e-23  0.6863009 0.645 0.439  3.665256e-19       0           AES
## OXNAD1         1.164692e-22  1.7131223 0.217 0.082  1.597259e-18       0        OXNAD1
## RGCC           3.050476e-22  1.0887431 0.360 0.184  4.183423e-18       0          RGCC
## FLT3LG         4.101801e-21  1.4036755 0.282 0.134  5.625210e-17       0        FLT3LG
## RPS7           5.825889e-21  0.2832534 0.997 0.987  7.989625e-17       0          RPS7
## RPL29          9.492159e-21  0.2462272 0.994 0.984  1.301755e-16       0         RPL29
## EPHX2          1.518260e-20  2.2132803 0.121 0.030  2.082142e-16       0         EPHX2
## RPS9           1.774342e-20  0.2657734 0.999 0.986  2.433332e-16       0          RPS9
## RHOH           2.313540e-20  1.0868955 0.376 0.206  3.172789e-16       0          RHOH
## C12orf57       4.612279e-20  1.1083841 0.426 0.257  6.325279e-16       0      C12orf57
## RPL26          5.113798e-20  0.2416266 0.996 0.991  7.013063e-16       0         RPL26
## TMEM123        5.544099e-20  0.7526939 0.549 0.384  7.603177e-16       0       TMEM123
## NDFIP1         6.360731e-20  1.0778784 0.444 0.283  8.723107e-16       0        NDFIP1
## LCK            1.276374e-19  0.7912599 0.500 0.312  1.750420e-15       0           LCK
## RPL28          2.327206e-19  0.1962027 1.000 0.993  3.191530e-15       0         RPL28
## C14orf64       7.044380e-19  2.1687455 0.118 0.031  9.660663e-15       0      C14orf64
## HNRNPA1        1.189733e-18  0.3177602 0.961 0.918  1.631600e-14       0       HNRNPA1
## TOMM7          1.280213e-18  0.4789868 0.841 0.757  1.755684e-14       0         TOMM7
## GIMAP5         2.435741e-18  0.7639453 0.408 0.243  3.340375e-14       0        GIMAP5
## CD3G           2.478344e-18  0.9408833 0.335 0.178  3.398801e-14       0          CD3G
## NGFRAP1        2.739478e-18  1.0610024 0.172 0.062  3.756921e-14       0       NGFRAP1
## SCGB3A1        3.277375e-18  2.4404345 0.114 0.030  4.494593e-14       0       SCGB3A1
## IL32           7.367129e-18  0.2280903 0.772 0.474  1.010328e-13       0          IL32
## RP11-664D1.1   7.448676e-18  2.1001652 0.121 0.034  1.021511e-13       0  RP11-664D1.1
## HINT1          7.543242e-18  0.4675713 0.741 0.606  1.034480e-13       0         HINT1
## APBA2          1.376412e-17  2.2130728 0.114 0.031  1.887611e-13       0         APBA2
## SELL           3.822182e-17  0.6231188 0.595 0.429  5.241740e-13       0          SELL
## CD8B           5.024126e-17  1.4163830 0.215 0.096  6.890086e-13       0          CD8B
## NUCB2          1.057363e-16  1.3533374 0.201 0.086  1.450067e-12       0         NUCB2
## RGS10          2.710405e-16  0.6854698 0.527 0.377  3.717050e-12       0         RGS10
## TSHZ2          3.315123e-16  2.4487436 0.092 0.022  4.546360e-12       0         TSHZ2
## RP11-291B21.2  7.275789e-16  2.4564465 0.087 0.020  9.978017e-12       0 RP11-291B21.2
## MYC            1.834328e-15  1.0705858 0.236 0.116  2.515598e-11       0           MYC
## ACTN1          2.834538e-15  1.1963538 0.142 0.050  3.887285e-11       0         ACTN1
## GYPC           3.189151e-15  0.8552184 0.397 0.255  4.373602e-11       0          GYPC
## EIF4A2         3.707051e-15  0.5340309 0.738 0.606  5.083849e-11       0        EIF4A2
## AAK1           3.872967e-15  1.2490141 0.231 0.115  5.311386e-11       0          AAK1
## TRAT1          4.807266e-15  1.0646944 0.264 0.141  6.592684e-11       0         TRAT1
## RPL8           7.486744e-15  0.2331500 0.993 0.986  1.026732e-10       0          RPL8
## GNB2L1         7.876619e-15  0.2572562 0.990 0.977  1.080200e-10       0        GNB2L1
## DNAJB1         2.370874e-14  0.9701762 0.390 0.257  3.251416e-10       0        DNAJB1
## BEX2           6.175741e-14  1.0635440 0.184 0.081  8.469411e-10       0          BEX2
## CD6            1.137212e-13  1.2797712 0.185 0.084  1.559572e-09       0           CD6
##  [ reached 'max' / getOption("max.print") -- omitted 11564 rows ]
pbmc.markers %>%
    group_by(cluster) %>%
    dplyr::filter(avg_log2FC > 1)
## # A tibble: 7,024 × 7
## # Groups:   cluster [9]
##        p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene     
##        <dbl>      <dbl> <dbl> <dbl>     <dbl> <fct>   <chr>    
##  1 5.32e-114       1.19 0.912 0.591 7.29e-110 0       LDHB     
##  2 1.31e- 83       2.35 0.439 0.11  1.79e- 79 0       CCR7     
##  3 2.61e- 78       1.06 0.85  0.403 3.58e- 74 0       CD3D     
##  4 5.89e- 55       1.03 0.731 0.398 8.07e- 51 0       CD3E     
##  5 3.91e- 50       2.11 0.338 0.104 5.36e- 46 0       LEF1     
##  6 2.53e- 47       1.23 0.624 0.36  3.47e- 43 0       NOSIP    
##  7 5.11e- 46       2.04 0.335 0.109 7.01e- 42 0       PRKCQ-AS1
##  8 5.49e- 43       1.51 0.438 0.186 7.52e- 39 0       PIK3IP1  
##  9 9.17e- 41       2.73 0.199 0.04  1.26e- 36 0       FHIT     
## 10 1.26e- 33       1.32 0.39  0.177 1.72e- 29 0       TCF7     
## # ℹ 7,014 more rows
pbmc.markers
##                       p_val avg_log2FC pct.1 pct.2     p_val_adj cluster          gene
## RPS6          2.937899e-145  0.6898595 1.000 0.995 4.029035e-141       0          RPS6
## RPS12         4.537276e-144  0.7396535 1.000 0.991 6.222420e-140       0         RPS12
## RPL32         1.899782e-140  0.6330085 0.999 0.995 2.605361e-136       0         RPL32
## RPS27         2.256206e-137  0.7261693 0.999 0.992 3.094161e-133       0         RPS27
## RPS14         1.833572e-133  0.6367519 1.000 0.994 2.514561e-129       0         RPS14
## RPS25         2.202430e-129  0.7911714 0.997 0.975 3.020412e-125       0         RPS25
## RPL31         9.519837e-121  0.7770798 0.999 0.963 1.305550e-116       0         RPL31
## RPL9          1.093296e-119  0.7735097 0.999 0.970 1.499346e-115       0          RPL9
## RPS3A         1.360323e-116  0.8204878 0.999 0.974 1.865546e-112       0         RPS3A
## RPS3          4.735309e-115  0.6026055 1.000 0.994 6.494002e-111       0          RPS3
## LDHB          5.315619e-114  1.1863137 0.912 0.591 7.289840e-110       0          LDHB
## RPL13         8.785264e-114  0.5558595 1.000 0.996 1.204811e-109       0         RPL13
## RPL3          1.259214e-110  0.6016573 0.997 0.995 1.726886e-106       0          RPL3
## RPL30         1.616151e-110  0.6806377 1.000 0.979 2.216390e-106       0         RPL30
## RPL21         3.071789e-110  0.6770451 0.997 0.991 4.212651e-106       0         RPL21
## RPS15A        1.166374e-108  0.6527358 0.997 0.983 1.599565e-104       0        RPS15A
## RPS27A        3.958327e-106  0.7768403 0.997 0.966 5.428449e-102       0        RPS27A
## RPLP2         3.902958e-103  0.6034904 1.000 0.990  5.352517e-99       0         RPLP2
## EEF1A1         1.093339e-98  0.5369938 0.994 0.991  1.499405e-94       0        EEF1A1
## RPS13          1.074943e-97  0.7129838 0.987 0.961  1.474177e-93       0         RPS13
## RPS18          5.180602e-95  0.5265658 1.000 0.996  7.104678e-91       0         RPS18
## RPL11          8.215087e-93  0.4714148 1.000 0.995  1.126617e-88       0         RPL11
## RPL13A         6.472793e-92  0.4780475 1.000 0.999  8.876788e-88       0        RPL13A
## MALAT1         1.084174e-89  0.6441156 1.000 0.999  1.486836e-85       0        MALAT1
## RPL27A         1.612236e-88  0.5373650 0.999 0.988  2.211020e-84       0        RPL27A
## RPL35A         2.095224e-88  0.5867145 1.000 0.980  2.873390e-84       0        RPL35A
## RPS28          1.289928e-87  0.6408666 0.991 0.969  1.769007e-83       0         RPS28
## RPS23          8.996629e-87  0.5938163 1.000 0.976  1.233798e-82       0         RPS23
## RPS29          2.279008e-86  0.7683730 0.993 0.903  3.125432e-82       0         RPS29
## TPT1           7.840418e-86  0.6067410 0.997 0.982  1.075235e-81       0          TPT1
## RPS20          2.359727e-85  0.7100360 0.991 0.941  3.236130e-81       0         RPS20
## RPL23A         7.505228e-84  0.5568751 1.000 0.988  1.029267e-79       0        RPL23A
## CCR7           1.305705e-83  2.3501488 0.439 0.110  1.790644e-79       0          CCR7
## RPS4X          5.341427e-82  0.5230436 0.997 0.992  7.325233e-78       0         RPS4X
## CD3D           2.613397e-78  1.0628875 0.850 0.403  3.584012e-74       0          CD3D
## RPL5           1.082456e-76  0.6075214 0.988 0.969  1.484480e-72       0          RPL5
## RPL10          2.064950e-75  0.3709084 1.000 0.997  2.831872e-71       0         RPL10
## RPS16          1.223484e-73  0.5411983 0.996 0.982  1.677886e-69       0         RPS16
## RPL19          9.843176e-73  0.4022453 0.999 0.993  1.349893e-68       0         RPL19
## RPL10A         7.091479e-71  0.5470559 0.991 0.985  9.725254e-67       0        RPL10A
## RPSA           2.882226e-68  0.5854662 0.988 0.948  3.952685e-64       0          RPSA
## RPL14          6.585636e-64  0.4447083 0.996 0.985  9.031541e-60       0         RPL14
## RPS10          1.332199e-61  0.5371322 0.991 0.968  1.826978e-57       0         RPS10
## RPS5           2.436058e-61  0.5184481 0.990 0.975  3.340810e-57       0          RPS5
## NPM1           9.738318e-61  0.7971905 0.922 0.797  1.335513e-56       0          NPM1
## RPS15          7.595881e-60  0.4177772 1.000 0.989  1.041699e-55       0         RPS15
## RPL18          1.237903e-57  0.5038895 0.999 0.978  1.697661e-53       0         RPL18
## RPLP0          1.978925e-57  0.5476535 0.988 0.938  2.713897e-53       0         RPLP0
## RPL36          9.837428e-57  0.5491005 0.990 0.948  1.349105e-52       0         RPL36
## CD3E           5.885316e-55  1.0274342 0.731 0.398  8.071122e-51       0          CD3E
## RPL7           2.243682e-53  0.5914250 0.981 0.966  3.076986e-49       0          RPL7
## RPL36A         1.706982e-51  0.6163363 0.965 0.887  2.340956e-47       0        RPL36A
## LEF1           3.911145e-50  2.1066714 0.338 0.104  5.363744e-46       0          LEF1
## RPL4           9.797668e-50  0.5170381 0.994 0.956  1.343652e-45       0          RPL4
## RPS19          4.943950e-49  0.3765976 0.999 0.992  6.780134e-45       0         RPS19
## RPS2           8.998587e-49  0.3366035 1.000 0.995  1.234066e-44       0          RPS2
## RPL17          7.052454e-48  0.4772102 0.977 0.957  9.671735e-44       0         RPL17
## EEF1B2         1.972361e-47  0.5724925 0.945 0.867  2.704896e-43       0        EEF1B2
## NOSIP          2.532206e-47  1.2285845 0.624 0.360  3.472667e-43       0         NOSIP
## RPL22          8.502524e-47  0.7086351 0.913 0.776  1.166036e-42       0         RPL22
## RPS8           8.809197e-47  0.4319487 0.997 0.984  1.208093e-42       0          RPS8
## RPL18A         1.000407e-46  0.3206133 0.999 0.992  1.371958e-42       0        RPL18A
## PRKCQ-AS1      5.109668e-46  2.0421152 0.335 0.109  7.007398e-42       0     PRKCQ-AS1
## PIK3IP1        5.487008e-43  1.5149033 0.438 0.186  7.524882e-39       0       PIK3IP1
## TMEM66         5.573516e-43  0.8245007 0.850 0.698  7.643520e-39       0        TMEM66
## RPS26          5.330005e-42  0.5299071 0.965 0.897  7.309569e-38       0         RPS26
## BTG1           9.098088e-42  0.5963114 0.947 0.858  1.247712e-37       0          BTG1
## JUNB           3.637979e-41  0.7038651 0.913 0.904  4.989125e-37       0          JUNB
## GLTSCR2        4.347251e-41  0.6219340 0.916 0.804  5.961820e-37       0       GLTSCR2
## FHIT           9.171236e-41  2.7283106 0.199 0.040  1.257743e-36       0          FHIT
## IL7R           3.785547e-40  0.9511342 0.613 0.328  5.191499e-36       0          IL7R
## RPL38          9.709762e-40  0.5534875 0.952 0.872  1.331597e-35       0         RPL38
## RPS21          3.778434e-38  0.5389210 0.942 0.838  5.181744e-34       0         RPS21
## RPLP1          1.229862e-37  0.3157365 0.999 0.996  1.686633e-33       0         RPLP1
## CD7            4.521902e-37  0.8346521 0.571 0.295  6.201337e-33       0           CD7
## RPL35          2.614687e-36  0.4065346 0.993 0.964  3.585781e-32       0         RPL35
## RPL12          3.650663e-35  0.2977780 1.000 0.989  5.006519e-31       0         RPL12
## RPL24          8.842800e-35  0.4278003 0.984 0.942  1.212702e-30       0         RPL24
## RPL37          8.721650e-34  0.4374005 0.987 0.942  1.196087e-29       0         RPL37
## TCF7           1.257656e-33  1.3168493 0.390 0.177  1.724750e-29       0          TCF7
## MAL            7.989692e-33  1.8871824 0.263 0.088  1.095706e-28       0           MAL
## C6orf48        2.257274e-32  0.9450448 0.699 0.491  3.095626e-28       0       C6orf48
## RPL34          1.934954e-31  0.6268241 0.931 0.902  2.653596e-27       0         RPL34
## RPSAP58        1.586125e-30  0.5962843 0.848 0.696  2.175212e-26       0       RPSAP58
## RPL27          2.169278e-30  0.3826747 0.977 0.951  2.974948e-26       0         RPL27
## NELL2          6.989473e-30  2.1982153 0.156 0.033  9.585363e-26       0         NELL2
## RPL37A         8.086232e-30  0.3776142 0.986 0.945  1.108946e-25       0        RPL37A
## CD27           1.582455e-29  0.9705380 0.399 0.183  2.170179e-25       0          CD27
## RPS4Y1         3.345104e-29  0.4904763 0.939 0.869  4.587476e-25       0        RPS4Y1
## LTB            4.574471e-29  0.4311664 0.903 0.633  6.273430e-25       0           LTB
## RPL6           1.421552e-28  0.3022301 0.999 0.992  1.949516e-24       0          RPL6
## LINC00176      8.346010e-28  2.5390247 0.146 0.031  1.144572e-23       0     LINC00176
## RPL7A          1.703449e-27  0.3293995 0.987 0.975  2.336110e-23       0         RPL7A
## LDLRAP1        2.865661e-27  2.3083384 0.240 0.087  3.929967e-23       0       LDLRAP1
## LEPROTL1       3.988222e-27  1.1531127 0.405 0.208  5.469447e-23       0      LEPROTL1
## TRABD2A        6.169276e-27  2.2714828 0.197 0.060  8.460545e-23       0       TRABD2A
## RPL15          2.248364e-25  0.2783623 0.996 0.996  3.083407e-21       0         RPL15
## SH3YL1         1.113838e-23  1.7373622 0.197 0.065  1.527517e-19       0        SH3YL1
## ADTRP          1.463116e-23  3.0056072 0.101 0.016  2.006517e-19       0         ADTRP
## AES            2.672638e-23  0.6863009 0.645 0.439  3.665256e-19       0           AES
## OXNAD1         1.164692e-22  1.7131223 0.217 0.082  1.597259e-18       0        OXNAD1
## RGCC           3.050476e-22  1.0887431 0.360 0.184  4.183423e-18       0          RGCC
## FLT3LG         4.101801e-21  1.4036755 0.282 0.134  5.625210e-17       0        FLT3LG
## RPS7           5.825889e-21  0.2832534 0.997 0.987  7.989625e-17       0          RPS7
## RPL29          9.492159e-21  0.2462272 0.994 0.984  1.301755e-16       0         RPL29
## EPHX2          1.518260e-20  2.2132803 0.121 0.030  2.082142e-16       0         EPHX2
## RPS9           1.774342e-20  0.2657734 0.999 0.986  2.433332e-16       0          RPS9
## RHOH           2.313540e-20  1.0868955 0.376 0.206  3.172789e-16       0          RHOH
## C12orf57       4.612279e-20  1.1083841 0.426 0.257  6.325279e-16       0      C12orf57
## RPL26          5.113798e-20  0.2416266 0.996 0.991  7.013063e-16       0         RPL26
## TMEM123        5.544099e-20  0.7526939 0.549 0.384  7.603177e-16       0       TMEM123
## NDFIP1         6.360731e-20  1.0778784 0.444 0.283  8.723107e-16       0        NDFIP1
## LCK            1.276374e-19  0.7912599 0.500 0.312  1.750420e-15       0           LCK
## RPL28          2.327206e-19  0.1962027 1.000 0.993  3.191530e-15       0         RPL28
## C14orf64       7.044380e-19  2.1687455 0.118 0.031  9.660663e-15       0      C14orf64
## HNRNPA1        1.189733e-18  0.3177602 0.961 0.918  1.631600e-14       0       HNRNPA1
## TOMM7          1.280213e-18  0.4789868 0.841 0.757  1.755684e-14       0         TOMM7
## GIMAP5         2.435741e-18  0.7639453 0.408 0.243  3.340375e-14       0        GIMAP5
## CD3G           2.478344e-18  0.9408833 0.335 0.178  3.398801e-14       0          CD3G
## NGFRAP1        2.739478e-18  1.0610024 0.172 0.062  3.756921e-14       0       NGFRAP1
## SCGB3A1        3.277375e-18  2.4404345 0.114 0.030  4.494593e-14       0       SCGB3A1
## IL32           7.367129e-18  0.2280903 0.772 0.474  1.010328e-13       0          IL32
## RP11-664D1.1   7.448676e-18  2.1001652 0.121 0.034  1.021511e-13       0  RP11-664D1.1
## HINT1          7.543242e-18  0.4675713 0.741 0.606  1.034480e-13       0         HINT1
## APBA2          1.376412e-17  2.2130728 0.114 0.031  1.887611e-13       0         APBA2
## SELL           3.822182e-17  0.6231188 0.595 0.429  5.241740e-13       0          SELL
## CD8B           5.024126e-17  1.4163830 0.215 0.096  6.890086e-13       0          CD8B
## NUCB2          1.057363e-16  1.3533374 0.201 0.086  1.450067e-12       0         NUCB2
## RGS10          2.710405e-16  0.6854698 0.527 0.377  3.717050e-12       0         RGS10
## TSHZ2          3.315123e-16  2.4487436 0.092 0.022  4.546360e-12       0         TSHZ2
## RP11-291B21.2  7.275789e-16  2.4564465 0.087 0.020  9.978017e-12       0 RP11-291B21.2
## MYC            1.834328e-15  1.0705858 0.236 0.116  2.515598e-11       0           MYC
## ACTN1          2.834538e-15  1.1963538 0.142 0.050  3.887285e-11       0         ACTN1
## GYPC           3.189151e-15  0.8552184 0.397 0.255  4.373602e-11       0          GYPC
## EIF4A2         3.707051e-15  0.5340309 0.738 0.606  5.083849e-11       0        EIF4A2
## AAK1           3.872967e-15  1.2490141 0.231 0.115  5.311386e-11       0          AAK1
## TRAT1          4.807266e-15  1.0646944 0.264 0.141  6.592684e-11       0         TRAT1
## RPL8           7.486744e-15  0.2331500 0.993 0.986  1.026732e-10       0          RPL8
## GNB2L1         7.876619e-15  0.2572562 0.990 0.977  1.080200e-10       0        GNB2L1
## DNAJB1         2.370874e-14  0.9701762 0.390 0.257  3.251416e-10       0        DNAJB1
## BEX2           6.175741e-14  1.0635440 0.184 0.081  8.469411e-10       0          BEX2
## CD6            1.137212e-13  1.2797712 0.185 0.084  1.559572e-09       0           CD6
##  [ reached 'max' / getOption("max.print") -- omitted 11564 rows ]
cluster0.markers <- FindMarkers(pbmc, ident.1 = 0, logfc.threshold = 0.25, test.use = "roc", only.pos = TRUE)
VlnPlot(pbmc, features = c("MS4A1", "CD79A"))

# you can plot raw counts as well
VlnPlot(pbmc, features = c("NKG7", "PF4"), slot = "counts", log = TRUE)
## Warning: The `slot` argument of `VlnPlot()` is deprecated as of Seurat 5.0.0.
## ℹ Please use the `layer` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

FeaturePlot(pbmc, features = c("MS4A1", "GNLY", "CD3E", "CD14", "FCER1A", "FCGR3A", "LYZ", "PPBP",
    "CD8A"))

pbmc.markers %>%
    group_by(cluster) %>%
    dplyr::filter(avg_log2FC > 1) %>%
    slice_head(n = 10) %>%
    ungroup() -> top10

DoHeatmap(pbmc, features = top10$gene) + NoLegend()
## Warning in DoHeatmap(pbmc, features = top10$gene): The following features were omitted as they were not found in the scale.data slot for the
## RNA assay: PKIB, KLRD1, PILRA, MS4A4A, HES4, CDKN1C, CD8A, VPREB3, TRAT1, TNFRSF4, TCF7, FHIT, PIK3IP1, PRKCQ-AS1, NOSIP, LEF1, CD3E, CD3D,
## CCR7, LDHB

new.cluster.ids <- c("Naive CD4 T", "CD14+ Mono", "Memory CD4 T", "B", "CD8 T", "FCGR3A+ Mono",
    "NK", "DC", "Platelet")

names(new.cluster.ids) <- levels(pbmc)

pbmc <- RenameIdents(pbmc, new.cluster.ids)


# pbmc <- StashIdent(pbmc, save.name = "original_cluster")

# 恢复原始 cluster
#Idents(pbmc) <- pbmc@meta.data$original_cluster


DimPlot(pbmc, reduction = "umap", label = TRUE, pt.size = 0.5) + NoLegend()

head(pbmc@meta.data)
##                  orig.ident nCount_RNA nFeature_RNA percent.mt RNA_snn_res.0.5 seurat_clusters
## AAACATACAACCAC-1     pbmc3k       2419          779  3.0177759               0               0
## AAACATTGAGCTAC-1     pbmc3k       4903         1352  3.7935958               3               3
## AAACATTGATCAGC-1     pbmc3k       3147         1129  0.8897363               2               2
## AAACCGTGCTTCCG-1     pbmc3k       2639          960  1.7430845               1               1
## AAACCGTGTATGCG-1     pbmc3k        980          521  1.2244898               6               6
## AAACGCACTGGTAC-1     pbmc3k       2163          781  1.6643551               2               2
plot <- DimPlot(pbmc, reduction = "umap", label = TRUE, label.size = 4.5) + xlab("UMAP 1") + ylab("UMAP 2") +
    theme(axis.title = element_text(size = 18), legend.text = element_text(size = 18)) + guides(colour = guide_legend(override.aes = list(size = 10)))
ggsave(filename = "./images/pbmc3k_umap.jpg", height = 7, width = 12, plot = plot, quality = 50)
saveRDS(pbmc, file = "./pbmc3k_final.rds")
# test classical marker gene expression cluster

FeaturePlot(pbmc, features = c("IL7R","CCR7","CD14","LYZ","S100A4","MS4A1","CD8A","FCGR3A","MS4A7","GNLY","NKG7","FCER1A","CST3","PPBP"))

pbmc
## An object of class Seurat 
## 13714 features across 2638 samples within 1 assay 
## Active assay: RNA (13714 features, 2000 variable features)
##  3 layers present: counts, data, scale.data
##  2 dimensional reductions calculated: pca, umap
# Visulization part
# 随机分组 size = ncol(pbmc3k.final) -> 抽样总数为细胞数量
pbmc$groups <- sample(c("group1", "group2"), size = ncol(pbmc), replace = TRUE)

# 可视化关键marker基因
features <- c("LYZ", "CCL5", "IL32", "PTPRCAP", "FCGR3A", "PF4")
# Ridge plots - from ggridges. Visualize single cell expression distributions in each cluster
RidgePlot(pbmc, features = features, ncol = 2)
## Picking joint bandwidth of 0.318
## Picking joint bandwidth of 0.174
## Picking joint bandwidth of 0.161
## Picking joint bandwidth of 0.152
## Picking joint bandwidth of 0.0572
## Picking joint bandwidth of 0.0298

# Violin plot - Visualize single cell expression distributions in each cluster
VlnPlot(pbmc, features = features)

# Feature plot - visualize feature expression in low-dimensional space
FeaturePlot(pbmc, features = features)

# Dot plots - the size of the dot corresponds to the percentage of cells expressing the
# feature in each cluster. The color represents the average expression level
DotPlot(pbmc, features = features) + RotatedAxis()

# Single cell heatmap of feature expression
DoHeatmap(subset(pbmc, downsample = 100), features = features, size = 3)
## Warning in DoHeatmap(subset(pbmc, downsample = 100), features = features, : The following features were omitted as they were not found in the
## scale.data slot for the RNA assay: PTPRCAP

# Plot a legend to map colors to expression levels
FeaturePlot(pbmc, features = "MS4A1")

# Adjust the contrast in the plot
FeaturePlot(pbmc, features = "MS4A1", min.cutoff = 1, max.cutoff = 3)

# Calculate feature-specific contrast levels based on quantiles of non-zero expression.
# Particularly useful when plotting multiple markers
FeaturePlot(pbmc, features = c("MS4A1", "PTPRCAP"), min.cutoff = "q10", max.cutoff = "q90")

# Visualize co-expression of two features simultaneously
FeaturePlot(pbmc, features = c("MS4A1", "CD79A"), blend = TRUE)

# Split visualization to view expression by groups (replaces FeatureHeatmap)
FeaturePlot(pbmc, features = c("MS4A1", "CD79A"), split.by = "groups")& scale_color_viridis_c()
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

# Violin plots can also be split on some variable. Simply add the splitting variable to object
# metadata and pass it to the split.by argument
VlnPlot(pbmc, features = "percent.mt", split.by = "groups")
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##       
## This message will be shown once per session.

# SplitDotPlotGG has been replaced with the `split.by` parameter for DotPlot
DotPlot(pbmc, features = features, split.by = "groups") + RotatedAxis()

# DoHeatmap now shows a grouping bar, splitting the heatmap into groups or clusters. This can
# be changed with the `group.by` parameter
DoHeatmap(pbmc, features = VariableFeatures(pbmc)[1:100], cells = 1:500, size = 4,
    angle = 90) + NoLegend()

# Step for Moncle3 pseodutime analysis
library(monocle3)

pbmc <- readRDS("./output/pbmc3k_final.rds")

DimPlot(pbmc, reduction = "umap")

# 查看 Seurat 对象的所有 assay
names(pbmc)
## [1] "RNA"     "RNA_nn"  "RNA_snn" "pca"     "umap"
# meta
colnames(pbmc@meta.data)
## [1] "orig.ident"      "nCount_RNA"      "nFeature_RNA"    "percent.mt"      "RNA_snn_res.0.5" "seurat_clusters"
pbmc.cds <- as.cell_data_set(pbmc)
## Warning: Monocle 3 trajectories require cluster partitions, which Seurat does not calculate. Please run 'cluster_cells' on your cell_data_set
## object
pbmc.cds <- cluster_cells(cds = pbmc.cds, reduction_method = "UMAP")
p1 <- plot_cells(pbmc.cds, show_trajectory_graph = FALSE)
p2 <- plot_cells(pbmc.cds, color_cells_by = "partition", show_trajectory_graph = FALSE)
wrap_plots(p1, p2)

pbmc.cds <- learn_graph(pbmc.cds, use_partition = TRUE)
##   |                                                                                                                                             |                                                                                                                                     |   0%  |                                                                                                                                             |=====================================================================================================================================| 100%
##   |                                                                                                                                             |                                                                                                                                     |   0%  |                                                                                                                                             |=====================================================================================================================================| 100%
##   |                                                                                                                                             |                                                                                                                                     |   0%  |                                                                                                                                             |=====================================================================================================================================| 100%
plot_cells(pbmc.cds, label_groups_by_cluster = FALSE, label_leaves = FALSE, label_branch_points = FALSE)

plot_cells(pbmc.cds, label_groups_by_cluster = FALSE, label_leaves = FALSE, label_branch_points = TRUE, color_cells_by = 'seurat_clusters')

pbmc.cds <- order_cells(pbmc.cds, reduction_method = "UMAP")
## 
## Listening on http://127.0.0.1:4504
# root_cell for curate initial cellList
# plot trajectories colored by pseudotime
plot_cells(
  cds = pbmc.cds,
  color_cells_by = "pseudotime",
  show_trajectory_graph = TRUE
)
## Cells aren't colored in a way that allows them to be grouped.

# 提取伪时间值并添加到 Seurat 对象
pbmc <- AddMetaData(
  object = pbmc,
  metadata = pbmc.cds@principal_graph_aux@listData$UMAP$pseudotime,
  col.name = "PBMC"
)
FeaturePlot(pbmc, c("PBMC"), pt.size = 0.1) & scale_color_viridis_c()
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

# Slingshot

library(slingshot)
library(SingleCellExperiment)
library(RColorBrewer)
# seurat对象数据格式转换
sce <- as.SingleCellExperiment(pbmc)
sce
## class: SingleCellExperiment 
## dim: 13714 2638 
## metadata(0):
## assays(2): counts logcounts
## rownames(13714): AL627309.1 AP006222.2 ... PNRC2.1 SRSF10.1
## rowData names(0):
## colnames(2638): AAACATACAACCAC-1 AAACATTGAGCTAC-1 ... TTTGCATGAGAGGC-1 TTTGCATGCCTCAC-1
## colData names(8): orig.ident nCount_RNA ... PBMC ident
## reducedDimNames(2): PCA UMAP
## mainExpName: RNA
## altExpNames(0):
# slingshot分析
sce <- slingshot(data = sce, 
                 clusterLabels = 'seurat_clusters',
                 reducedDim = 'UMAP',
                 start.clus = NULL, # 可指定起点亚群
                 end.clus = NULL # 可指定终点亚群
                 )
colnames(colData(sce))
##  [1] "orig.ident"        "nCount_RNA"        "nFeature_RNA"      "percent.mt"        "RNA_snn_res.0.5"   "seurat_clusters"  
##  [7] "PBMC"              "ident"             "slingshot"         "slingPseudotime_1" "slingPseudotime_2" "slingPseudotime_3"
## [13] "slingPseudotime_4"
summary(sce$slingPseudotime_1)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.000   4.420   8.453  10.279  10.795  25.716    1323
pal <- c(RColorBrewer::brewer.pal(9, "Set1"), RColorBrewer::brewer.pal(8, "Set2"))

# 通过“type”参数查看基于聚类的最小生成树最初是如何估计谱系结构的
plot(reducedDims(sce)$UMAP,col = pal[sce$seurat_clusters],cex = 0.5, pch=16, asp=1)

#lin1 <- getLineages(sce, 
#                    clusterLabels = "seurat_clusters", 
                    #start.clus = 'Pi16',#可指定起始细胞簇
                    #end.clus=c("Comp","Col15a1","Ccl19", "Coch", "Cxcl12", "Fbln1", "Bmp4", "Npnt", "Hhip"),#可指定终点细胞簇
#                    reducedDim = "UMAP")
plot.new()
plot(reducedDims(sce)$UMAP,col = brewer.pal(10,'Paired')[sce$seurat_clusters],pch=16,asp=1)

lines(SlingshotDataSet(sce), lwd=2,col = 'black')

colnames(colData(sce))
##  [1] "orig.ident"        "nCount_RNA"        "nFeature_RNA"      "percent.mt"        "RNA_snn_res.0.5"   "seurat_clusters"  
##  [7] "PBMC"              "ident"             "slingshot"         "slingPseudotime_1" "slingPseudotime_2" "slingPseudotime_3"
## [13] "slingPseudotime_4"
# fit a GAM with a smooth spline term for pseudotime
library(gam)

sling.pseu<-data.frame(sce$ident,sce$slingPseudotime_1,sce$slingPseudotime_2,sce$slingPseudotime_3,sce$slingPseudotime_4)

rownames(sling.pseu)<-colnames(sce)

head(sling.pseu)
##                     sce.ident sce.slingPseudotime_1 sce.slingPseudotime_2 sce.slingPseudotime_3 sce.slingPseudotime_4
## AAACATACAACCAC-1  Naive CD4 T             6.7373710             6.6467270             6.7236016             6.8558177
## AAACATTGAGCTAC-1            B                    NA                    NA            22.0512018                    NA
## AAACATTGATCAGC-1 Memory CD4 T                    NA                    NA                    NA            10.9656304
## AAACCGTGCTTCCG-1   CD14+ Mono            23.1584569                    NA                    NA                    NA
## AAACCGTGTATGCG-1           NK             0.7451371             0.7446951             0.7457627             0.7452388
## AAACGCACTGGTAC-1 Memory CD4 T             8.7288553                    NA             7.5729890             8.9297217
t <- sling.pseu$sce.slingPseudotime_4
sling.cells <- rownames(sling.pseu)[!is.na(t)] 
head(sling.cells)
## [1] "AAACATACAACCAC-1" "AAACATTGATCAGC-1" "AAACCGTGTATGCG-1" "AAACGCACTGGTAC-1" "AAACGCTGACCAGT-1" "AAACGCTGGTTCTT-1"
table(sling.pseu[sling.cells,]$sce.ident)
## 
##  Naive CD4 T   CD14+ Mono Memory CD4 T            B        CD8 T FCGR3A+ Mono           NK           DC     Platelet 
##          203            1          375            0          308            0          154            0           14
##gene expression along pseudotime
test<-FetchData(pbmc,c("ident","CCL5"))

test<-test[sling.cells,]

test$pseu<-sling.pseu[sling.cells,]$sce.slingPseudotime_4

ggplot(test,aes(pseu,CCL5))+ geom_point(aes(colour=factor(ident)),alpha = 0.8)+ 
    geom_smooth(colour="gray30",size=1.2,fullrange = TRUE) +
    theme_bw()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

# Find temporally expressed genes


# Only look at the transcription factors
# Identify the variable genes by ranking all genes by their variance.

# log data
Y <- pbmc[["RNA"]]$data

allTFs<-read.table('./TFs_hg19_RcisTarget_hTF_TFDB.txt') 

allTFs<-as.character(allTFs$V1)

tf.used<-intersect(allTFs,rownames(sce))


# 取谱系中的cell数量为GMM拟合时与矩阵细胞数维度匹配
Y <- Y[tf.used, sling.cells]  # only counts for TFs

dim(Y)
## [1] 1453 1055
# Fit GAM for each gene using pseudotime as independent variable.
pseu.t <- sce$slingPseudotime_4
pseu.t <- pseu.t[!is.na(pseu.t)]

smooth.data<-array(c(0),dim = c(length(sling.cells),length(tf.used)))
rownames(smooth.data)<-sling.cells
colnames(smooth.data)<-tf.used

gam.pval<-as.data.frame(array(c(0),dim = c(length(tf.used),2)))
gam.pval[,1]<-tf.used

rownames(gam.pval)<-tf.used;
colnames(gam.pval)<-c('TF_used','p_value')
for (j in 1:length(tf.used)) {
    d <- data.frame(z=Y[j,], t.used=pseu.t)
    tmp<-gam(Y[j,] ~ s(t.used,5), data=d) 
    # gam(z ~ lo(t), data=d) #loess
    gam.pval[j,2]<-as.numeric(summary(tmp)[4][[1]][1,5])
    smooth.data[,j]<-tmp$fitted.values
}
gam.pval<-with(gam.pval,gam.pval[order(p_value),])
topgenes <- rownames(subset(gam.pval,p_value<=1e-5)) #select significantly changed genes
heatdata <- t(smooth.data[order(pseu.t, na.last = NA),topgenes]) 
max.g<-apply(heatdata,1,which.max) 
min.g<-apply(heatdata,1,which.min) 
genes<-as.data.frame(cbind(max.g,min.g))
genes<-with(genes,genes[order(max.g,min.g),])
used.genes<-as.character(rownames(genes))
print(length(used.genes))
## [1] 40
heatdata <- t(smooth.data[order(pseu.t, na.last = NA),used.genes])
dim(heatdata)
## [1]   40 1055
library(RColorBrewer)
library(plotly)
library(pheatmap)

s=3
pheatmap(heatdata,scale = "row", cluster_rows =F,cluster_cols = F,col= colorRampPalette(c("navy","white","red"))(200),#
legend = TRUE,show_rownames = T, show_colnames = F,
border_color=NA,fontsize = s,fontsize_row = s,fontsize_col = s)

# Monocle3 tutorial

expression_matrix <- readRDS(url("https://depts.washington.edu:/trapnell-lab/software/monocle3/celegans/data/packer_embryo_expression.rds"))
cell_metadata <- readRDS(url("https://depts.washington.edu:/trapnell-lab/software/monocle3/celegans/data/packer_embryo_colData.rds"))
gene_annotation <- readRDS(url("https://depts.washington.edu:/trapnell-lab/software/monocle3/celegans/data/packer_embryo_rowData.rds"))

cds <- new_cell_data_set(expression_matrix,
                         cell_metadata = cell_metadata,
                         gene_metadata = gene_annotation)

cds <- preprocess_cds(cds, num_dim = 50)
cds <- align_cds(cds, alignment_group = "batch", residual_model_formula_str = "~ bg.300.loading + bg.400.loading + bg.500.1.loading + bg.500.2.loading + bg.r17.loading + bg.b01.loading + bg.b02.loading")
## Aligning cells from different batches using Batchelor.
## Please remember to cite:
##   Haghverdi L, Lun ATL, Morgan MD, Marioni JC (2018). 'Batch effects in single-cell RNA-sequencing data are corrected by matching mutual nearest neighbors.' Nat. Biotechnol., 36(5), 421-427. doi: 10.1038/nbt.4091
cds <- reduce_dimension(cds)
## No preprocess_method specified, and aligned coordinates have been computed previously. Using preprocess_method = 'Aligned'
plot_cells(cds, label_groups_by_cluster=FALSE,  color_cells_by = "cell.type")
## No trajectory to plot. Has learn_graph() been called yet?
## Warning: Removed 1 row containing missing values or values outside the scale range (`geom_text_repel()`).

ciliated_genes <- c("che-1",
                    "hlh-17",
                    "nhr-6",
                    "dmd-6",
                    "ceh-36",
                    "ham-1")

plot_cells(cds,
           genes=ciliated_genes,
           label_cell_groups=FALSE,
           show_trajectory_graph=FALSE)

cds <- cluster_cells(cds)
plot_cells(cds, color_cells_by = "partition")
## No trajectory to plot. Has learn_graph() been called yet?

cds <- learn_graph(cds)
##   |                                                                                                                                             |                                                                                                                                     |   0%  |                                                                                                                                             |=====================================================================================================================================| 100%
##   |                                                                                                                                             |                                                                                                                                     |   0%  |                                                                                                                                             |=====================================================================================================================================| 100%
plot_cells(cds,
           color_cells_by = "cell.type",
           label_groups_by_cluster=FALSE,
           label_leaves=FALSE,
           label_branch_points=FALSE)
## Warning: Removed 1 row containing missing values or values outside the scale range (`geom_text_repel()`).

plot_cells(cds,
           color_cells_by = "embryo.time.bin",
           label_cell_groups=FALSE,
           label_leaves=TRUE,
           label_branch_points=TRUE,
           graph_label_size=1.5)

cds <- order_cells(cds)
## 
## Listening on http://127.0.0.1:4504
plot_cells(cds,
           color_cells_by = "pseudotime",
           label_cell_groups=FALSE,
           label_leaves=FALSE,
           label_branch_points=FALSE,
           graph_label_size=1.5)